rm(list=ls())
library(data.table)
in.path = '~/Research_Group Dropbox/Jacob Brown/RCE/Output_Voter_Files/2008 RDD - Jake/state files/'
out.path =  '~/Research_Group Dropbox/Jacob Brown/Obama Effect/Replication/data/voterfile/'


# This script pulls the raw L2 dataa for each state in the analysis, and formats the variable in preparation for the next script
# which aggregates counts of each variable by birthdatae cohort.

states = c("AL", "AR", "CA", "CT", "FL", "IL", "IN", "KS", "KY", "MA", "MD",
                   "MO",
                   "NE", "NV", "NY", "OH", "OR", "PA", "RI", "SC", "TN", "TX", "VA", "WA",
                   "WV")
files = list.files(in.path)
files = files[grepl('csv',files)]
files = files[substr(files,1,2)%in%states]


for(f in files){

  data = fread(paste0(in.path,f), select = c('LALVOTERID', 
                                         'Voters_Gender', 
                                         'Voters_BirthDate',
                                         'EthnicGroups_EthnicGroup1Desc', 
                                         'Parties_Description', 
                                         'General_2000_11_07',
                                         'General_2002_11_05',
                                         'General_2004_11_02',
                                         'General_2006_11_07',
                                         'General_2008_11_04',
                                         'General_2010_11_02',
                                         'General_2012_11_06',
                                         'General_2014_11_04',
                                         'General_2016_11_08' ))
  
# rename variables  
data[,lalvoterid:=LALVOTERID]
data[,race:=EthnicGroups_EthnicGroup1Desc]
data[,birth.date:=Voters_BirthDate]
data[,gender:=Voters_Gender]
data[,party:=Parties_Description]

# make race variables
data[, asian:= as.numeric(race == 'East and South Asian')]
data[, black:= as.numeric(race == 'Likely African-American')]
data[, hispanic:= as.numeric(race == 'Hispanic and Portuguese')]
data[, white:= as.numeric(race == 'European')]

# make male and female variables
data[,female:= as.numeric(gender=='F')]

# make party variables
data[,democrat:= as.numeric(party=='Democratic')]
data[,republican:= as.numeric(party=='Republican')]

# recode turnout variables
if('General_2000_11_07' %in% names(data)){
  data[,vote.gen2000:= ifelse(General_2000_11_07 != 'Y' | is.na(General_2000_11_07), 0, 1)]
} else {
  data[,vote.gen2000:=NA]
}

if('General_2002_11_05' %in% names(data)){
  data[,vote.gen2002:= ifelse(General_2002_11_05 != 'Y' | is.na(General_2002_11_05), 0, 1)]
} else {
  data[,vote.gen2002:=NA]
}

if('General_2004_11_02' %in% names(data)){
  data[,vote.gen2004:= ifelse(General_2004_11_02 != 'Y' | is.na(General_2004_11_02), 0 , 1)]
} else {
  data[,vote.gen2004:=NA]
}

if('General_2006_11_07' %in% names(data)){
  data[,vote.gen2006:= ifelse(General_2006_11_07 != 'Y' | is.na(General_2006_11_07), 0 ,1)]
} else {
  data[,vote.gen2006:=NA]
}

data[,vote.gen2008:= ifelse(General_2008_11_04 != 'Y' | is.na(General_2008_11_04), 0 ,1)]
data[,vote.gen2010:= ifelse(General_2010_11_02 != 'Y' | is.na(General_2010_11_02), 0 ,1)]
data[,vote.gen2012:= ifelse(General_2012_11_06 != 'Y' | is.na(General_2012_11_06), 0 ,1)]
data[,vote.gen2014:= ifelse(General_2014_11_04 != 'Y' | is.na(General_2014_11_04), 0 ,1)]
data[,vote.gen2016:= ifelse(General_2016_11_08 != 'Y' | is.na(General_2016_11_08), 0 ,1)]




#


# remove unformatted variables
data[, c('LALVOTERID', 
        'Voters_Gender', 
        'Voters_BirthDate',
        'EthnicGroups_EthnicGroup1Desc', 
        'Parties_Description', 
        'General_2000_11_07',
        'General_2002_11_05',
        'General_2004_11_02',
        'General_2006_11_07',
        'General_2008_11_04',
        'General_2010_11_02',
        'General_2012_11_06',
        'General_2014_11_04',
        'General_2016_11_08' ):= NULL]


st = tolower(substr(f,1,2))
save(data, file = paste0(out.path,st,'_voterfile_rdd.Rdata'))
print(st)
rm(data,st)
gc()

}
